import requests
import lxml.etree as le
import xlrd
import xlwt.Workbook

content = requests.get(url = "https://www.runoob.com/html/html-tutorial.html").content
contentx = le.HTML(content)
#爬取文字标签
rets = contentx.xpath("//div[@id='leftcolumn']/a/text()")
for category in rets:
    print(category)
#爬取链接
html = contentx.xpath("//div[@id='leftcolumn']/a/@href")
for url in html:
    print(url)

# 存入excel文件
def save_title(self):
    print
    "111"
    num = 1
    title_file = xlwt.Workbook(encoding="utf-8")
    table = title_file.add_sheet('data')
    # 存入表头
    table.write(0, 0, "time")
    table.write(0, 1, "url")
    table.write(0, 2, "title")
    all_data = self.html()
    for data in all_data:
        table.write(num, 0, data[0])
        table.write(num, 1, data[1])
        table.write(num, 2, data[2])
        print
        "saving done..."
        num += 1
    title_file.save("002.xls")  # 保存文件

